###################################################################################################
#_______________________________ Number of Exchanges at Best Price _______________________________#
###################################################################################################

# This script produces the figures and regression outputs that use the
# "numexatbbo_top5_2015.csv" dataset.
#
# "numexatbbo_top5_2015.csv" is a symbol-date-#_of_exchanges level dataset with a row
# for how many milliseconds (ms) a given number of exchanges were at the BBO
# (computed among the top 8 exchanges) for symbol-dates (e.g. the number of ms
# 4 exchanges were at the BBO for SPY on May 8, 2011).
#
# This dataset has already been merged with the "Symbol_Universe_2015" dataset
# and only contains observations for the sample symbols. We also use the
# "f_Sample_T100" flag from the symbol universe file to identify the Top 100
# symbols and "f_NYSE_Listed" to split the data between NYSE and non-NYSE listed
# symbols.
#
# The output of the script is as follows:
# 1. Tables containing the percent of ms a number of exchanges were at the BBO
#	for NYSE and non-NYSE listed securities
# 1. Bar graphs displaying the percentage of ms a number of exchanges were at the BBO
#	in 2015. One graph for each of the following settings
#		i) Top 5 exchanges, NYSE-listed symbols
#		ii) Top 5 exchanges, non-NYSE listed symbols

################################################################################
#_______________________________ Load Libraries _______________________________#
################################################################################

library(ggplot2)
library(dplyr)
library(data.table)

# Suppress warnings
options(warn=-1)

#############################################################################
#_______________________________ Directories _______________________________#
#############################################################################

home.dir <- "path-to-data-appendix/3_Analysis_of_TAQ_data"
data.dir <- file.path(home.dir, "WRDS_Server/Output")
fig.dir <- file.path(home.dir, "Final_Output/Figures")
table.dir <- file.path(home.dir, "Final_Output/Tables")

###########################################################################
#_______________________________ Functions _______________________________#
###########################################################################

# plot.bargraph
# Plot bar graphs for the percentage a number of exchanges were at the BBO
# over 2015.
#
# @returns:
#   bar graph of the percentage of ms a number of exchanges were
#   at the BBO.
#
# @args (for reproducing results):
#   data: dataframe, this is a dataset containing the percentages
#     already aggregated over symbols and dates.
#   x: string, the name of the x variable, in this case the number of
#     possible exchanges at the best price.
#   y: string, the name of the y variable, in this case the percentage of
#     ms a number of exchanges are at the BBO
#   title: string, title of the plot
#   xlab: string, the label for the x-axis
#   ylab: string, the label for the y-axis
#   figure_path: string, the path and the name of the output figure
#   breaks: vector, the ticks for the x-axis. Makes sure a tick is placed
#     for every integer.
#   xlim: vector, the upper and lower limits for the x-axis
#   ylim: vector, the upper and lower limits for the y-axis
#   axis_size: float, size of the axis
#
plot.bargraph <- function(data,
                          x = 'num_exAtBestPrice',
                          y = 'percent',
                          title = "", xlab = "Number of Exchanges at the Best Price", ylab = "Percent of Time (%)",
                          figure_path = "",
                          breaks = seq(0, 10, 1), xlim = c(0.5, 8.5), ylim = c(0, 100),
                          axis_size = 35) {

  g <- ggplot(data, aes_string(x = x, y = y)) + geom_bar(stat = "identity", position = "dodge") +
    scale_x_continuous(breaks = breaks, limits = xlim) +
    scale_y_continuous(limits = ylim) +
    labs(x = xlab, y = ylab, title = title) +
    theme(axis.text.x = element_text(size = axis_size), axis.text.y = element_text(size = axis_size), axis.title.x = element_text(size = axis_size),
          axis.title.y = element_text(size = axis_size), title = element_text(size = 27))

  ggsave(file= figure_path,
         plot = g, width = 12, height= 8.8, dpi=120)
}

#################################################################################
#_______________________________ Top 5 exchanges _______________________________#
#################################################################################

sink(file.path(home.dir, "Final_Output/Logs/Num_Exchanges_at_BBO_log.txt"))

# Load Number of Exchanges at BBO dataset
numExAtBBO <- data.table::fread(file.path(data.dir, "numexatBBO_top5_shares_2015.csv"))
# Date in date format
numExAtBBO$Date <- as.Date(as.character(numExAtBBO$DATE), "%Y-%m-%d")
# Filter, only Top 100
numExAtBBO_T100 <- numExAtBBO[numExAtBBO$f_Sample_T100 == 1,]

# Split into NYSE Listed and Non NYSE Listed
numExAtBBO_T100_NYSE <- numExAtBBO_T100[numExAtBBO_T100$f_NYSE_Listed == 1,]
numExAtBBO_T100_non_NYSE <- numExAtBBO_T100[numExAtBBO_T100$f_NYSE_Listed == 0,]

# Constant for decimal to percentage conversion
decimal_to_percent <- 100

# NYSE LISTED

# Group by number of exchanges at best price.
# Sum ms over all top 100 symbols and the year.
numExAtBBO_annual_NYSE <- numExAtBBO_T100_NYSE %>%
                          dplyr::group_by(num_exAtBestPrice) %>%
                          dplyr::summarise(count_both_main5_annual = sum(as.numeric(count_both_main5), na.rm = TRUE)) %>%
                          dplyr::mutate(total_ms_at_BBO = sum(count_both_main5_annual))

# Compute percentages
numExAtBBO_annual_NYSE$percent_BBO_Top5 <- (numExAtBBO_annual_NYSE$count_both_main5_annual / numExAtBBO_annual_NYSE$total_ms_at_BBO) * decimal_to_percent
numExAtBBO_annual_NYSE <- data.frame(numExAtBBO_annual_NYSE)

# NON-NYSE LISTED

# Group by number of exchanges at best price.
# Sum ms over all top 100 symbols and the year.
numExAtBBO_annual_non_NYSE <- numExAtBBO_T100_non_NYSE %>%
                            dplyr::group_by(num_exAtBestPrice) %>%
                            dplyr::summarise(count_both_main5_annual = sum(as.numeric(count_both_main5), na.rm = TRUE)) %>%
                            dplyr::mutate(total_ms_at_BBO = sum(count_both_main5_annual))

# Compute percentages
numExAtBBO_annual_non_NYSE$percent_BBO_Top5 <- (numExAtBBO_annual_non_NYSE$count_both_main5_annual / numExAtBBO_annual_non_NYSE$total_ms_at_BBO) * decimal_to_percent
numExAtBBO_annual_non_NYSE <- data.frame(numExAtBBO_annual_non_NYSE)

# Export these tables
data.table::fwrite(numExAtBBO_annual_NYSE, file.path(table.dir, "numexatBBO_top5_BBO_2015_NYSE.csv"))
data.table::fwrite(numExAtBBO_annual_non_NYSE, file.path(table.dir, "numexatBBO_top5_BBO_2015_non_NYSE.csv"))

###################################################################################################
#_______________________________ Percentage only 1 exchange at BBO _______________________________#
###################################################################################################

cat("*** Percent of milliseconds only 1 exchange at the BBO ***", "\n")
cat("\n")

# Two digit precision when we print numbers
two_digit_precision <- 2

# Percentage of ms 1 exchange (out of Top 5) is at the BBO for NYSE-Listed symbols
NYSE_pct_1_ex_Top5 <- numExAtBBO_annual_NYSE[numExAtBBO_annual_NYSE$num_exAtBestPrice == 1,]$percent_BBO_Top5
# Round to 2 decimal places
NYSE_pct_1_ex_Top5 <- format(round(NYSE_pct_1_ex_Top5, two_digit_precision), two_digit_precision)

print(paste0("For NYSE-Listed symbols, only ", NYSE_pct_1_ex_Top5, "% of ms have only 1 exchange (out of Top 5) at the BBO."))

# Percentage of ms 1 exchange (out of Top 5) is at the BBO for non NYSE-Listed symbols
non_NYSE_pct_1_ex_Top5 <- numExAtBBO_annual_non_NYSE[numExAtBBO_annual_non_NYSE$num_exAtBestPrice == 1,]$percent_BBO_Top5
# Round to 2 decimal places
non_NYSE_pct_1_ex_Top5 <- format(round(non_NYSE_pct_1_ex_Top5, two_digit_precision), two_digit_precision)

print(paste0("For non NYSE-Listed symbols, only ", non_NYSE_pct_1_ex_Top5, "% of ms have only 1 exchange (out of Top 5) at the BBO."))
cat("\n")

#################################################################################################
#_______________________________ Percentage All exchanges at BBO _______________________________#
#################################################################################################

cat("*** Percent of milliseconds all exchanges at the BBO ***", "\n")
cat("\n")

# % of ms all exchanges are at the best bid or offer

# Top 5
NYSE_pct_all_ex_BBO_Top5 <- numExAtBBO_annual_NYSE[numExAtBBO_annual_NYSE$num_exAtBestPrice == 5,]$percent_BBO_Top5
# Round to 2 decimal places
NYSE_pct_all_ex_BBO_Top5 <- format(round(NYSE_pct_all_ex_BBO_Top5, two_digit_precision), two_digit_precision)

print(paste0("For NYSE-Listed symbols, all exchanges (out of Top 5) are at the best bid or offer for ", NYSE_pct_all_ex_BBO_Top5, "% of ms."))

# Top 5
non_NYSE_pct_all_ex_BBO_Top5 <- numExAtBBO_annual_non_NYSE[numExAtBBO_annual_non_NYSE$num_exAtBestPrice == 4,]$percent_BBO_Top5
# Round to 2 decimal places
non_NYSE_pct_all_ex_BBO_Top5 <- format(round(non_NYSE_pct_all_ex_BBO_Top5, two_digit_precision), two_digit_precision)

print(paste0("For non NYSE-Listed symbols, all exchanges (out of Top 4) are at the best bid or offer for ", non_NYSE_pct_all_ex_BBO_Top5, "% of ms."))
cat("\n")

################################################################################
#_______________________________ Plot Bar Graph _______________________________#
################################################################################

# NYSE Listed (Top 5)
plot.bargraph(numExAtBBO_annual_NYSE,
              x = "num_exAtBestPrice", y = "percent_BBO_Top5",
              figure_path = file.path(fig.dir,"numExAtBBO_T100_NYSE_Top5_BBO.png"), xlim = c(0.5, 5.5), ylim = c(0, 100))

# Non-NYSE listed (Top 5)
plot.bargraph(numExAtBBO_annual_non_NYSE,
              x = "num_exAtBestPrice", y = "percent_BBO_Top5",
              figure_path = file.path(fig.dir,"numExAtBBO_T100_non_NYSE_Top5_BBO.png"), xlim = c(0.5, 4.5), ylim = c(0, 100))

sink()
